####################################
### Main Text Replication Code
### Slanted Narratives, Social Media, and Foreign Influence in Libya
### Grossman, Jonsson, Lyon, & Sizer
####################################

# libraries and data loading ----------------------------------------------

### libraries needed for replication
library(tidyverse)
library(estimatr)
library(texreg)
library(rio)
library(rcompanion)
library(ggplot2)
library(ggthemes)
library(tmap)
library(xtable)

### set file path
root <- "~/Dropbox (Personal)/Libya and Information/main analysis code/replication" # add path to folder where main replication dataset (libya_narratives.csv) is located

### set wd to replication folder, for output to be saved to
setwd(root)

### load main replication dataset
libya_data <- import(paste0(root, "/libya_narratives.csv"))

### load validation data
valid_data <- import(paste0(root, "/validation.csv"))


# construct slant measure -------------------------------------------------

### slant = (gna total - laaf total) / (unigrams + bigrams)

libya_data <- libya_data %>%
  mutate(gna_total = gna_unigrams + gna_bigrams,
         laaf_total = lna_unigrams + lna_bigrams,
         gna_slant = (gna_total - laaf_total) / (unigrams_total + bigrams_total),
         slant_std = scale(gna_slant))


# figure 2: validation ----------------------------------------------------

### variables for validation
### hand_code = numeric value associated with hand coding, as described in Figure 2 caption
### bucket = slant category (gna_slant < 0 == LAAF; gna_slant = 0 == Neutral; gna_slant > 0 == GNA)

### construct 95 and 99 percent CIs fot hand code measure by each slant category
valid95 <- groupwiseMean(hand_code ~ bucket, data = valid_data, conf = 0.95)
valid99 <- groupwiseMean(hand_code ~ bucket, data = valid_data, conf = 0.99)

# create plot (y axis)
ggplot() + 
  geom_errorbar(data = valid99, 
                aes(x = reorder(bucket, Mean), 
                    ymin = Trad.lower, ymax = Trad.upper), 
                width = 0, size = 1.5) +
  geom_errorbar(data = valid95, 
                aes(x = reorder(bucket, Mean), 
                    ymin = Trad.lower, ymax = Trad.upper), 
                width = 0, size = 3) +
  geom_point(data = valid95, aes(x = reorder(bucket, Mean), y = Mean),
             size = 2) +
  geom_hline(yintercept = 0, lty = 'dashed', color = 'red') + 
  scale_y_continuous(labels=c("-2" = "Strongly Anti-GNA", 
                              "-1" = "Somewhat Anti-GNA",
                              "0" = "Neutral",
                              "1" = "Somewhat Pro-GNA",
                              "2" = "Strongly Pro-GNA")) + 
  scale_x_discrete(labels=c("LAAF" = "LAAF \n (n = 33)",
                            "Neutral" = "Neutral \n (n = 397)",
                            "GNA" = "GNA \n (n = 56)")) + 
  labs(x = "\n GNA Slant Category", 
       y = "Manual Coding Category \n") +
  coord_cartesian(ylim = c(-2, 2)) +
  theme_bw() + 
  theme(axis.text.y = element_text(angle = 30, vjust = 0.5, size = 14),
        axis.text.x = element_text(size = 14),
        axis.title.y = element_text(size = 16, face = 'bold'),
        axis.title.x = element_text(size = 16, face = 'bold')) + 
  ggsave("valid_plot.pdf", device = "pdf")



# figure 3: number of posts by country ------------------------------------

### bring in map file from tmap library
data("World")

### aggregate posts to country level and merge with World data

map_input <- libya_data %>%
  # change country names for merge with World data
  mutate(plurality = case_when(
    plurality == "US" ~ "United States of America",
    plurality == "UK" ~ "United Kingdom",
    plurality == "UAE" ~ "United Arab Emirates",
    TRUE ~ as.character(plurality)
  )) %>%
  # rename for merge
  dplyr::rename(sovereignt = plurality) %>%
  group_by(sovereignt) %>%
  dplyr::summarise(n = n()) %>%
  left_join(World, c_level, by = "sovereignt") %>%
  left_join(World, ., by = 'sovereignt')

### create map of number of posts by country
pdf("map_number_posts_total.pdf")
tm_shape(map_input) + 
  tm_polygons("n", palette = "Reds", style = "fixed", title = "Number Posts", colorNA = "white", 
              breaks = c(0, 100, 200, 500, 1000, 2000, 7926)) # 7926 is max in dataset
dev.off()

# table 2: descriptive statistics -----------------------------------------

### list the relevant countries for table of descriptive statistics
keep_countries <- c('Libya', 'Egypt', 'France', 'Qatar', 'Russia', 'Saudi Arabia', 'Turkey', 'UAE', 'US', 'Location hidden')

### obtain descriptive statistics for these countries
keep_countries_stats <- libya_data %>% group_by(plurality) %>%
  dplyr::summarise(n = n(),
                   gna_mean = mean(gna_total, na.rm = T),
                   gna_sd = sd(gna_total, na.rm = T),
                   lna_mean = mean(laaf_total, na.rm = T),
                   lna_sd = sd(laaf_total, na.rm = T),
                   slant_mean = mean(gna_slant, na.rm = T),
                   slant_sd = sd(gna_slant, na.rm = T)) %>%
  arrange(., desc(n)) %>%
  filter(plurality %in% keep_countries) %>%
  mutate_if(is.double, round, digits = 3)

### obtain overall descriptive statistics for all posts in dataset
overall <- c("All posts", 
             nrow(libya_data), round(mean(libya_data$gna_total, na.rm = T), 3), 
             round(sd(libya_data$gna_total, na.rm = T), 3), 
             round(mean(libya_data$laaf_total, na.rm = T), 3),
             round(sd(libya_data$laaf_total, na.rm = T), 3),
             round(mean(libya_data$gna_slant, na.rm = T), 3),
             round(sd(libya_data$gna_slant, na.rm = T), 3))

### bring all descriptive statistics into single table
desc_stats <- rbind(overall, keep_countries_stats) 
print(xtable(desc_stats), include.rownames = F)

# table 3: coefficients from separate regressions --------

### create data frame for regressions with plurality country indicators
reg_df <- libya_data %>%
  mutate(libya = ifelse(plurality == "Libya", 1, 0),
         egypt = ifelse(plurality == "Egypt", 1, 0),
         france = ifelse(plurality == "France", 1, 0),
         qatar = ifelse(plurality == "Qatar", 1, 0),
         russia = ifelse(plurality == "Russia", 1, 0),
         saudi = ifelse(plurality == "Saudi Arabia", 1, 0),
         turkey = ifelse(plurality == "Turkey", 1, 0),
         uae = ifelse(plurality == "UAE", 1, 0),
         us = ifelse(plurality == "US", 1, 0),
         hidden = ifelse(plurality == "Location hidden", 1, 0))

### separate regressions for standardized slant measure on each country indicator
### coefficient and standard error for plurality country from each regression
### is included in table 3 of main text
slant_libya <- lm_robust(slant_std ~ libya, data = reg_df,
                         fixed_effects = language, clusters = page_id)
slant_egypt <- lm_robust(slant_std ~ egypt, data = reg_df,
                         fixed_effects = language, clusters = page_id)
slant_france <- lm_robust(slant_std ~ france, data = reg_df,
                          fixed_effects = language, clusters = page_id)
slant_qatar <- lm_robust(slant_std ~ qatar, data = reg_df,
                         fixed_effects = language, clusters = page_id)
slant_russia <- lm_robust(slant_std ~ russia, data = reg_df,
                          fixed_effects = language, clusters = page_id)
slant_saudi <- lm_robust(slant_std ~ saudi, data = reg_df,
                         fixed_effects = language, clusters = page_id)
slant_turkey <- lm_robust(slant_std ~ turkey, data = reg_df,
                          fixed_effects = language, clusters = page_id)
slant_uae <- lm_robust(slant_std ~ uae, data = reg_df,
                       fixed_effects = language, clusters = page_id)
slant_us <- lm_robust(slant_std ~ us, data = reg_df,
                      fixed_effects = language, clusters = page_id)
slant_hidden <- lm_robust(slant_std ~ hidden, data = reg_df,
                          fixed_effects = language, clusters = page_id)

# ### separate regressions for laaf terms on each country indicator
### coefficient and standard error for plurality country from each regression
### is included in table 3 of main text
laaf_libya <- lm_robust(laaf_total ~ libya, data = reg_df,
                       fixed_effects = language, clusters = page_id)
laaf_egypt <- lm_robust(laaf_total ~ egypt, data = reg_df,
                       fixed_effects = language, clusters = page_id)
laaf_france <- lm_robust(laaf_total ~ france, data = reg_df,
                        fixed_effects = language, clusters = page_id)
laaf_qatar <- lm_robust(laaf_total ~ qatar, data = reg_df,
                       fixed_effects = language, clusters = page_id)
laaf_russia <- lm_robust(laaf_total ~ russia, data = reg_df,
                        fixed_effects = language, clusters = page_id)
laaf_saudi <- lm_robust(laaf_total ~ saudi, data = reg_df,
                       fixed_effects = language, clusters = page_id)
laaf_turkey <- lm_robust(laaf_total ~ turkey, data = reg_df,
                        fixed_effects = language, clusters = page_id)
laaf_uae <- lm_robust(laaf_total ~ uae, data = reg_df,
                     fixed_effects = language, clusters = page_id)
laaf_us <- lm_robust(laaf_total ~ us, data = reg_df,
                    fixed_effects = language, clusters = page_id)
laaf_hidden <- lm_robust(laaf_total ~ hidden, data = reg_df,
                        fixed_effects = language, clusters = page_id)

# ### separate regressions for gna terms on each country indicator
### coefficient and standard error for plurality country from each regression
### is included in table 3 of main text
gna_libya <- lm_robust(gna_total ~ libya, data = reg_df,
                       fixed_effects = language, clusters = page_id)
gna_egypt <- lm_robust(gna_total ~ egypt, data = reg_df,
                       fixed_effects = language, clusters = page_id)
gna_france <- lm_robust(gna_total ~ france, data = reg_df,
                        fixed_effects = language, clusters = page_id)
gna_qatar <- lm_robust(gna_total ~ qatar, data = reg_df,
                       fixed_effects = language, clusters = page_id)
gna_russia <- lm_robust(gna_total ~ russia, data = reg_df,
                        fixed_effects = language, clusters = page_id)
gna_saudi <- lm_robust(gna_total ~ saudi, data = reg_df,
                       fixed_effects = language, clusters = page_id)
gna_turkey <- lm_robust(gna_total ~ turkey, data = reg_df,
                        fixed_effects = language, clusters = page_id)
gna_uae <- lm_robust(gna_total ~ uae, data = reg_df,
                     fixed_effects = language, clusters = page_id)
gna_us <- lm_robust(gna_total ~ us, data = reg_df,
                    fixed_effects = language, clusters = page_id)
gna_hidden <- lm_robust(gna_total ~ hidden, data = reg_df,
                        fixed_effects = language, clusters = page_id)
